In [ ]:
%run "../Functions/2. Game sessions.ipynb"
In [ ]:
import unidecode
In [ ]:
accented_string = "Enormément"
# accented_string is of type 'unicode'
unaccented_string = unidecode.unidecode(accented_string)
unaccented_string
# unaccented_string contains 'Malaga'and is of type 'str'
In [ ]:
_rmDF = rmdf152
userId = 'e8fed737-7c65-49c8-bf84-f8ae71c094f8'
#userId = getRandomRedMetricsGUID(_rmdf)
#def getUserSessions( _rmDF, userId):
result = _rmDF.loc[:,['userId','sessionId']][_rmDF['userId']==userId]['sessionId'].drop_duplicates().dropna(how='any')
result
In [ ]:
_sessionIndex = randint(0,len(result)-1)
_guid = result.iloc[_sessionIndex]
_guid
In [ ]:
userId
In [ ]:
questionsAnswersTranslationsFR.T
In [ ]:
questionsAnswersTranslationsFR.loc["Are you interested in video games?"]
In [ ]:
questionsAnswersTranslationsFR.loc["Do you play video games?"]
In [ ]:
localizedFormFR = gformFR
# returns an English-indexed, English-localized answer dataframe
# from a French-indexed, French-localized dataframe
#def getTranslatedForm( localizedFormFR ):
result = localizedFormFR.copy()
# translate answers
for question in result.columns:
for index in result.index:
answer = result.loc[index, question]
if (0 != len(questionsAnswersTranslationsFR.loc[question])):
if (answer in questionsAnswersTranslationsFR.loc[question]):
result.loc[index, question] =\
questionsAnswersTranslationsFR.loc[question][answer]
else:
print(question)
#print(index)
print(answer)
print(questionsAnswersTranslationsFR.loc[question])
print()
print()
print()
# translate questions
result = result.rename(columns=dict(zip(localizedFormFR.columns,gformEN.columns)))
result.T
In [ ]:
len(questionsAnswersTranslationsFR.loc['Timestamp'])
In [ ]:
getTranslatedForm( gformFR, questionsAnswersTranslationsFR ).iloc[1]
In [ ]:
from random import randint
uniqueUsers = rmdf152['userId'].dropna().unique()
userCount = len(uniqueUsers)
testlocalplayerguid = '0'
while (not isGUIDFormat(testlocalplayerguid)):
userIndex = randint(0,userCount-1)
testlocalplayerguid = uniqueUsers[userIndex]
testlocalplayerguid
In [ ]:
sessionscount = rmdf152["sessionId"].nunique()
sessionscount
In [ ]:
platforms = rmdf152["customData.platform"].unique()
platforms
print("part100="+str(part100.head(1))) print("part131="+str(part131.head(1))) print("part132="+str(part132.head(1))) print("part133="+str(part133.head(1))) print("part140="+str(part140.head(1))) print("part150="+str(part150.head(1))) print("part151="+str(part151.head(1))) print("part152="+str(part152.head(1))) print("df="+str(df.head(1)))
In [ ]:
testGUID = '"4dbc2f43-421c-4e23-85d4-f17723ff8c66"'
In [ ]:
# includewithoutusers=True will count sessions that do not have any userId attached
getSessionsCount( rmdf152, testGUID)
print("part100="+str(part100.columns)) print("part131="+str(part131.columns)) print("part132="+str(part132.columns)) print("part133="+str(part133.columns)) print("part140="+str(part140.columns)) print("part150="+str(part150.columns)) print("part151="+str(part151.columns)) print("part152="+str(part152.columns))
print("dfconcat="+str(dfconcat.columns))
print("df="+str(df.columns))
df.columns
In [ ]:
sessionsList = getUserSessions(rmdf152, testGUID)
sessionsList
In [ ]:
sessionsList = rmdf152[rmdf152['type']=='start']
sessionsList = sessionsList.drop('type', 1)
sessionsList = sessionsList.dropna(how='any')
userSessionsList = sessionsList[sessionsList['userId']==testGUID]
userSessionsList
In [ ]:
#print(testGUID)
sessionsList = getUserSessions(rmdf152, testGUID)
#sessionsList = getAllSessions(rmdf152, testGUID.replace('"',''))
#print(type(sessionsList))
sessionsList.shape[0]
In [ ]:
allSessions = rmdf152.loc[:,['userId', 'sessionId']].drop_duplicates()
allSessions.head()
In [ ]:
allSessions.groupby('userId').size().reset_index(name='counts').sort_values(by='counts', ascending=False).head(10)
In [ ]:
#getUserSessionsCounts(getNormalizedRedMetricsCSV(part152)).head(10)
In [ ]:
allSessions.groupby('userId').agg(['count']).head() #.sort_values(by='sessionId', ascending=False).head(10)
In [ ]:
#df2 = pd.concat([df151, rmdf152])
#df2.head(2)
#print(df2.columns)
#df2columns = df2.columns.values
#type(df2columns)
#df2columns
#newColumns = np.concatenate((minimalInitializationColumns, df2columns))
#newColumns
#df2 = getNormalizedRedMetricsCSV(df)
In [ ]:
getRandomSessionGUID()
In [ ]:
_userId = '"e8fed737-7c65-49c8-bf84-f8ae71c094f8"'
type(rmdf152['userId'].dropna().unique()), type(getUserSessions( rmdf152, _userId ))
In [ ]:
_userId = 'e8fed737-7c65-49c8-bf84-f8ae71c094f8'
_uniqueSessions = getUserSessions( rmdf152, _userId )
len(_uniqueSessions)
_uniqueSessions
In [ ]:
#_userId = ''
_userId = '"e8fed737-7c65-49c8-bf84-f8ae71c094f8"'
#def getRandomSessionGUID( _userId = '' ):
rmId = _userId
if( not(isGUIDFormat(_userId))):
rmId = getRandomRedMetricsGUID()
_uniqueSessions = getUserSessions( rmdf152, rmId )
_sessionsCount = len(_uniqueSessions)
_guid = ""
_sessionIndex = randint(0,_sessionsCount-1)
_guid = _uniqueSessions.iloc[_sessionIndex]
_guid
In [ ]:
rmId
In [ ]:
_sessionIndex
In [ ]:
_sessionsCount
In [ ]:
randint(0,0)
In [ ]:
_uniqueSessions
In [ ]:
getRandomSessionGUID()
In [ ]:
userId = testGUID
#print('----------------------uid='+str(uid)+'----------------------')
sessions = getUserSessions(rmdf152, userId)
firstGameTime = pd.to_datetime('2050-12-31T12:59:59.000Z', utc=True)
for session in sessions:
#print('-----------------------------------------session='+str(session))
timedEvents = rmdf152[rmdf152['sessionId']==session]
timedEvents = timedEvents.dropna(subset=['section'])
if(len(timedEvents) > 0):
timedEvents['userTime'] = timedEvents['userTime'].map(lambda t: pd.to_datetime(t, utc=True))
earliest = timedEvents['userTime'].min()
if(earliest < firstGameTime):
firstGameTime = earliest
#else:
#print('no event with section')
#print('-----------------------------------------')
print("firstGameTime=" + str(firstGameTime))
In [ ]:
rmdf152[rmdf152['userId']==userId]
In [ ]:
sessions = getUserSessions(rmdf152, userId)
sessions